numerical python: provides a high-performance multidimensional array object, fast array-oriented arithmetic operations and flexible broadcasting capabilities


creating

create from list

import numpy as np
np.array([[1, 2, 3], [4, 5, 6]])
#  array([[1, 2, 3],
#         [4, 5, 6]])
np.arange(6).reshape((2,3))
#  array([[0, 1, 2],
#         [3, 4, 5]])
np.linspace(0, 1, 6).reshape((2,3)) 
#  array([[0. , 0.2, 0.4],
#         [0.6, 0.8, 1. ]])

Note the difference between np.ndarray and np.array() . The former is a data type, while the latter is a function to make arrays from other data structures.

create from scratch

import numpy as np
np.empty((2,3))
#  array([[4.89584003e-085, 4.89209922e-085, 9.64748202e-315],
#         [4.89584003e-085, 4.89209922e-085, 6.32257052e-310]])
np.zeros((2,3))
#  array([[0., 0., 0.],
#         [0., 0., 0.]])
np.ones((2,3))
#  array([[1., 1., 1.],
#         [1., 1., 1.]])
np.full((2,3), 42)
#  array([[42, 42, 42],
#         [42, 42, 42]])
np.empty((2,3))
#  array([[1., 1., 1.],
#         [1., 1., 1.]])
np.eye(3)
#  array([[1., 0., 0.],
#         [0., 1., 0.],
#         [0., 0., 1.]])

np.random

import numpy as np
np.random.seed(42)
np.random.random((2,3))
#  array([[0.37454012, 0.95071431, 0.73199394],
#         [0.59865848, 0.15601864, 0.15599452]])
np.random.normal(0, 1, (2,3))
#  array([[ 1.57921282,  0.76743473, -0.46947439],
#         [ 0.54256004, -0.46341769, -0.46572975]])
np.random.randint(0, 10, (2, 3))
#  array([[9, 5, 8],
#         [0, 9, 2]])

attributes

size, shape and type

x.ndim # number of dimensions
x.shape # size of each dimension
x.size # number of elements
x.dtype # e.g. float64

memory usage

x.itemsize # size (in bytes) of each array element
x.nbytes # total size (in bytes) of the array
x.data # memory address

indexing and slicing

basic

individual elements:

x[0,0] # 1st row, 1st column
x[1,1] # 2nd row, 2nd column

sub-arrays:

import numpy as np
x = np.array([[ 1., 2., 3.], [ 4., 5., 6.]])
x
#  array([[1., 2., 3.],
#         [4., 5., 6.]])
x[:2, 1:3]
#  array([[2., 3.],
#         [5., 6.]])

To make a copy use .copy

y = x[:2, 1:3].copy()

fancy

# TODO

reshaping

change sizes of dimensions

import numpy as np
np.arange(6).reshape((2,3))
#  array([[0, 1, 2],
#         [3, 4, 5]])
np.linspace(0, 1, 6).reshape((2,3)) 
#  array([[0. , 0.2, 0.4],
#         [0.6, 0.8, 1. ]])

add a dimension

import numpy as np
x = np.array([1, 2, 3, 4, 5, 6])
x
#  array([1, 2, 3, 4, 5, 6])
x.shape
#  (6,)
x.reshape((2,3))
#  array([[1, 2, 3],
#         [4, 5, 6]])
x[np.newaxis, :].shape
#  (1, 6)

flatten

import numpy as np
x = np.array([[ 1., 2., 3.], [ 4., 5., 6.]])
x
#  array([[1., 2., 3.],
#         [4., 5., 6.]])
x.shape
#  (2, 3)
x.ravel()
#  array([1., 2., 3., 4., 5., 6.])
x.ravel().shape
#  (6,)

joining

1d

import numpy as np
x = np.array([1, 2, 3]) 
y = np.array([4, 5, 6])
np.concatenate([x, y])
#  array([1, 2, 3, 4, 5, 6])

2d

import numpy as np
x = np.array([[ 1., 2., 3.], [ 4., 5., 6.]])
np.concatenate([x, x])
#  array([[1., 2., 3.],
#         [4., 5., 6.],
#         [1., 2., 3.],
#         [4., 5., 6.]])
np.vstack([x, x])
#  array([[1., 2., 3.],
#         [4., 5., 6.],
#         [1., 2., 3.],
#         [4., 5., 6.]])
import numpy as np
x = np.array([[ 1., 2., 3.], [ 4., 5., 6.]])
np.concatenate([x, x], axis=1)
#  array([[1., 2., 3., 1., 2., 3.],
#         [4., 5., 6., 4., 5., 6.]])
np.hstack([x, x])
#  array([[1., 2., 3., 1., 2., 3.],
#         [4., 5., 6., 4., 5., 6.]])

3d

Use np.concatenate with axis=1 or np.dstack


splitting

1d

import numpy as np
x = np.arange(10)
a, b = np.split(x, [5])
a
#  array([0, 1, 2, 3, 4])
b
#  array([5, 6, 7, 8, 9])
import numpy as np
x = np.arange(10)
a, b, c = np.split(x, [5, 8])
a
#  array([0, 1, 2, 3, 4])
b
#  array([5, 6, 7])
c
#  array([8, 9])

2d

import numpy as np
x = np.arange(12).reshape((3,4))
a, b = np.hsplit(x, [2])
a
#  array([[0, 1],
#         [4, 5],
#         [8, 9]])
b
#  array([[ 2,  3],
#         [ 6,  7],
#         [10, 11]])
import numpy as np
x = np.arange(12).reshape((3,4))
a, b = np.vsplit(x, [2])
a
#  array([[0, 1, 2, 3],
#         [4, 5, 6, 7]])
b
#  array([[ 8,  9, 10, 11]])

products

a = np.array([[1, 0], [0, 1], [2, 4]])
b = np.array([[4, 1], [2, 2], [1, 3]])

# element-wise (i.e. Hadamard) product
a * b 

# dot product
a @ b.T
np.dot(a, b.T)
np.matmul(a, b.T) # different broadcasting rules to np.dot

sequences

import numpy as np
np.arange(10) # 0 to 9, step size = 1
#  array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.arange(0, 10, 2) # (from, up-to-not-including, step-size)
#  array([0, 2, 4, 6, 8])
np.arange(0., 10., 2.)
#  array([0., 2., 4., 6., 8.])
np.linspace(0, 8, 5) # (from, up-to-including, length)
#  array([0., 2., 4., 6., 8.])

universal functions

These are vectorized operations (also called ufuncs).

arithmetic

ufunc e.g. notes
np.add x + 5
np.subtract x - 5
np.negative -x
np.multiply x * 2
np.divide x / 2
np.floor_divide x // 2
np.power(x, 2) x ** 2
np.mod x % 2
np.abs, np.absolute abs(x)
np.sin np.sin(x)
np.cos np.cos(x)
np.tan np.tan(x)
np.arcsin np.arcsin(x)
np.arccos np.arccos(x)
np.arctan np.arctan(x)
np.exp np.exp(x) i.e. e^x
np.exp2 2 ** x
np.power(3, x) 3 ** x
np.log np.log(x) i.e. ln(x)
np.log2 np.log2(x)
np.log10 np.log10(x)
np.expm1 np.expm1(x) i.e. exp(x) - 1
np.log1p np.log1p(x) i.e. log(1 + x)

np.expm1 and np.log1p are specialized versions that are useful for maintaining precision with very small input - when x is very small, these functions give more precise values than if the raw np.log or np.exp were used.

There are lots more functions available - see also scipy.special for some stats ones.

out argument

p56 python-data-science-handbook

aggregates

outer products


constants

np.inf
np.nan
np.pi
np.e